Metabolomics Data Analysis for Porites Bleaching 2019 Experiment

Metabolite Polarity Selection

With untargeted LC-MS, metabolites were detected under both negative and positive polarities. To prevent double counting metabolites in my analysis, I took the mean of each polarities across all samples and selected the metabolite that had the strongest ion intensity under that polarity.

#Read in required libraries
library("reshape")
#library(plyr)
library("dplyr")
library("tidyverse")
library("ggplot2")
library("arsenal")
library("Rmisc")
library(gridExtra)
library(ggpubr)
library(factoextra)
library(ropls)
library(mixOmics)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(lme4)
library(lmerTest)
library(car)
library(effects)
library(ggfortify)
library(cowplot)
library(vegan)
library(corrr)
library(ggcorrplot)
library(GGally)
library(broom)
library(cowplot)
library(RVAideMemoire)
library(arsenal)
library(patchwork)
library(tidyr)

#Import data

sample.info <- read.csv("../data/Metabolomics/Porites-Kevin_SampleInfo.csv")
peak.pos <- read.csv("../data/Metabolomics/Peaks_Pos.csv")
peak.neg <- read.csv("../data/Metabolomics/Peaks_Neg.csv")

#Removing unncessesary columns

peak.pos2 <- peak.pos[ -c(1:9)] 
peak.pos3 <- peak.pos2[ -c(2:9)] # if we need the blanks, change the 9 to 6

peak.neg2 <- peak.neg[ -c(1:9)] 
peak.neg3 <- peak.neg2[ -c(2:9)] # if we need the blanks, change the 9 to 6

#### Selecting polarity based on higher signal intensity ###

#obtain row means 
pos.mean <- data.frame(ID=peak.pos3[,1], Means=rowMeans(peak.pos3[,-1]))
neg.mean <- data.frame(ID=peak.neg3[,1], Means=rowMeans(peak.neg3[,-1]))

# Comparing polarity means of each metabolite, x = pos, y = neg 
cmp <- comparedf(pos.mean, neg.mean, by = "ID",
                 tol.factor = "labels")        # match only factor labels

n.diffs(cmp) #58 shared metabolites
## [1] 58
list.diffs <- as.data.frame(do.call(cbind, diffs(cmp))) #creating a dataframe with shared metabolites as a list

df.diffs <- data.frame(matrix(unlist(list.diffs), nrow=n.diffs(cmp), byrow=F),stringsAsFactors=FALSE) #converting list to dataframe

names(df.diffs)[1:7] <- c("var.x", "var.y", "ID", "values.x", "values.y", "row.x", "row.y") #changing column names

i <- c(4, 5) #specifying values column
df.diffs[ , i] <- apply(df.diffs[ , i], 2,            # changing values column to numeric
                        function(x) as.numeric(as.character(x)))

df.diffs$selected.pol <-  ifelse(df.diffs$values.x > df.diffs$values.y, 'x', 
                                 ifelse(df.diffs$values.x < df.diffs$values.y, 'y', 'tie')) #selecting polarity with highest mean

x.diff.keep <- df.diffs %>% 
  filter(selected.pol == "x") #extracting x selection

y.diff.keep <- df.diffs %>%
  filter(selected.pol == "y") #extracting y selection

x.all.keep <- peak.pos3[!(peak.pos3$compound %in% y.diff.keep$ID),] #removing rows where the metabolite is higher in neg df
y.all.keep <- peak.neg3[!(peak.neg3$compound %in% x.diff.keep$ID),] #removing rows where the metabolite is higher in pos df


# Re-shaping dataset 
peak.pos4<- melt(x.all.keep, id= "compound") #melting dataset 
peak.neg4<- melt(y.all.keep, id= "compound") #melting dataset 

# adding polarity 
peak.pos4$polarity <- "positive"
peak.neg4$polarity <- "negative"

# Binding positive and negative datasets together
peak.all <- rbind(peak.pos4, peak.neg4)

# Renaming column 
names(peak.all)[2] <- "Sample.ID"
names(peak.all)[3] <- "Raw.IonCount"

# Merging weight sample info
peak.all2 <- merge(peak.all, sample.info, by = "Sample.ID")

Normalization

#Normalization by weight

peak.all2$Raw.IonCount <- as.numeric(as.character(peak.all2$Raw.IonCount))
peak.all2$Norm.IonCount <- peak.all2$Raw.IonCount / peak.all2$Weight.mg

#Selecting columns of interest
peak.all3 <- peak.all2 %>% dplyr::select(Sample.ID, compound, Fragment.ID, Time, Treatment, Norm.IonCount)

#Reformatting dataframe so compounds are listed as column headers
peak.all4 <- peak.all3 %>% spread(compound, Norm.IonCount)

#adding 1000 to all variable to account for 0 values and log normalization 
#Log normalization (https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r)

peak.all5 <- log((peak.all4[5:184] + 1000), 2)

norm.data <- cbind(peak.all4[1:4], peak.all5)

Grouped PCA

Code inspired by: - http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/118-principal-component-analysis-in-r-prcomp-vs-princomp/ - https://github.com/urol-e5/timeseries/blob/master/time_series_analysis/integration_biological.Rmd line 2465

scaled_pca <-prcomp(norm.data[c(5:184)], scale=TRUE, center=TRUE)
fviz_eig(scaled_pca)

coral_info<-norm.data[c(3,4)]

pca_data<- scaled_pca%>%
  augment(coral_info)%>%
  group_by(Time, Treatment)%>%
  mutate(PC1.mean = mean(.fittedPC1),
         PC2.mean = mean(.fittedPC2))

pca.centroids<- pca_data%>% 
  dplyr::select(Time, Treatment, PC1.mean, PC2.mean)%>%
  dplyr::group_by(Time, Treatment)%>%
  dplyr::summarise(PC1.mean = mean(PC1.mean),
            PC2.mean = mean(PC2.mean))


#Examine PERMANOVA results.  

# scale data
vegan <- scale(norm.data[c(5:184)])

# PerMANOVA 
permanova<-adonis(vegan ~ Time*Treatment, data = norm.data, method='eu')
z_pca<-permanova$aov.tab
z_pca
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Time            2     875.0  437.52  2.8045 0.11048  0.001 ***
## Treatment       2     696.0  348.02  2.2308 0.08788  0.001 ***
## Time:Treatment  4     732.7  183.17  1.1741 0.09251  0.157    
## Residuals      36    5616.3  156.01         0.70912           
## Total          44    7920.0                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assemble plot with background points

#1. make plot with dots

#adding percentages on axis

names(pca_data)[4] <- "PCA1"
names(pca_data)[5] <- "PCA2"
percentage <- round((scaled_pca$sdev^2) / sum((scaled_pca$sdev^2)) * 100, 2)
percentage <- paste( colnames(pca_data[4:50]), "(", paste(as.character(percentage), "%", ")", sep="") )

#setting up data to add polygons
pca_data$Time.Treatment <- paste(pca_data$Time, pca_data$Treatment)
find_hull <- function(pca_data) pca_data[chull(pca_data$PCA1, pca_data$PCA2), ]
hulls <- ddply(pca_data, "Time.Treatment", find_hull)


PCA<-ggplot(pca_data, aes(PCA1, PCA2, color=Treatment)) + 
  geom_point(size = 4, alpha=0.2, aes(shape = Time))+
  scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00")) + 
  scale_shape_manual(values=c(15, 17, 19)) +
  theme_classic()+
  ylim(-10,10)+
  xlim(-15,15)+
  ylab(percentage[2])+
  xlab(percentage[1])+
  geom_text(x=9, y=-8.25, label=paste("p(Time)=", z_pca$`Pr(>F)`[1]), size=7, color=ifelse(z_pca$`Pr(>F)`[1] < 0.05, "black", "darkgray")) + 
  geom_text(x=9, y=-9, label=paste("p(Treatment)=", z_pca$`Pr(>F)`[2]), size=7, color=ifelse(z_pca$`Pr(>F)`[2] < 0.05, "black", "darkgray")) + 
  geom_text(x=9, y=-9.75, label=paste("p(Time x Treatment)=", z_pca$`Pr(>F)`[3]), size=7, color=ifelse(z_pca$`Pr(>F)`[3] < 0.05, "black", "darkgray")) + 
  theme(legend.text = element_text(size=17), 
        legend.position="none",
        plot.background = element_blank(),
        legend.title = element_text(size=22), 
        plot.margin = margin(1, 1, 1, 1, "cm"),
        axis.text = element_text(size=27), 
        title = element_text(size=25, face="bold"), 
        axis.title = element_text(size=30))

#Add centroids  

#2. add centroids 
PCAcen<-PCA +  geom_polygon(data = hulls, alpha = 0.2, aes(color = Treatment, fill = Treatment, lty = Time)) +
  geom_point(aes(x=PC1.mean, y=PC2.mean,color=Treatment, shape = Time), data=pca.centroids, size=4, show.legend=FALSE) + 
  scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) +
  scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) + 
  scale_shape_manual(values=c(15, 17, 19)) +
  theme(legend.text = element_text(size=17), 
        legend.position=c(0.9,0.8),
        plot.background = element_blank(),
        legend.title = element_text(size=22), 
        plot.margin = margin(1, 1, 1, 1, "cm"),
        axis.text = element_text(size=27), 
        title = element_text(size=25, face="bold"), 
        axis.title = element_text(size=30))



#Add segments

#3. add segments
segpoints<-pca.centroids%>%
  gather(variable, value, -(Time:Treatment)) %>%
  unite(temp, Time, variable) %>%
  spread(temp, value)

PCAfull<-PCAcen + 
  geom_segment(aes(x = Day0_PC1.mean, y = Day0_PC2.mean, xend = Day37_PC1.mean, yend = Day37_PC2.mean, colour = Treatment), data = segpoints, size=2, show.legend=FALSE) +
  geom_segment(aes(x = Day37_PC1.mean, y = Day37_PC2.mean, xend = Day52_PC1.mean, yend = Day52_PC2.mean, colour = Treatment), data = segpoints, size=2, arrow = arrow(length=unit(0.5,"cm")), show.legend=FALSE); PCAfull

ggsave(filename="../output/Metabolomics/FullPCA_metabolomics.pdf", plot=PCAfull, dpi=300, width=10, height=10, units="in")

PLS-DA on Timepoint 3 between each pair

Control vs Bleached

#Control vs Bleached

norm.data_2 <- column_to_rownames(norm.data, 'Sample.ID')

D52_CvsB <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Mortality_Hot")

D52_CvsB_clean <- na.omit(D52_CvsB)

#assigning datasets 
X <- D52_CvsB[4:183]
Y <- as.factor(D52_CvsB$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Control")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp2
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp3
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp4
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp5
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
## 
## $Comp6
##                                 AUC  p-value
## Bleached_Hot vs Control_Ambient   1 0.009023
#VIP Extraction
D52_CvsB_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsB_VIP_DF <- as.data.frame(D52_CvsB_VIP[["tab"]])
D52_CvsB_VIP_DF
##                                           VIP
## N-acetyl-glutamine                  1.8475464
## Ornithine                           1.8471462
## Arginine                            1.8447445
## 4-Guanidinobutanoic acid            1.8313272
## Nicotinamide riboside               1.8236378
## NADPH (SIM)                         1.8188263
## Homoarginine                        1.7686490
## o-acetyl-L-serine                   1.7272228
## Acetyl proline                      1.7261229
## Homocitrulline                      1.7116100
## Itaconic acid                       1.6527043
## NG-dimethyl-L-arginine              1.6502903
## Ribose phosphate                    1.6390388
## Uric acid                           1.5753606
## Lysine                              1.5659648
## Hypoxanthine                        1.5546614
## 2-Aminoethylphosphonate             1.5537645
## Glycine                             1.5474496
## Arginine-Valine                     1.5256292
## Histidine                           1.5202824
## Acetyllysine                        1.5116372
## Glutamine                           1.5116076
## Asparagine                          1.4973637
## O-Phosphorylethanolamine            1.4952363
## Dihydroorotate                      1.4941539
## Valine                              1.4804471
## Cytosine                            1.4471037
## Carnosine                           1.4413172
## Pyrroline-5-carboxylic acid         1.4120153
## O-Butanoylcarnitine                 1.3923286
## Aminoadipic acid                    1.3922921
## Alanine                             1.3771535
## 5-Methoxytryptamine                 1.3499814
## Stearic acid                        1.3353469
## Phosphocholine                      1.3054181
## Nicotinate                          1.2877107
## Betaine                             1.2875509
## Isoleucine                          1.2811918
## Glucuronic acid                     1.2266900
## N-acetyl-glutamate                  1.2144519
## Phenylpropanolamine                 1.2115881
## Mandelic acid                       1.2103823
## Pyroglutamate                       1.2016750
## Deoxycytidine                       1.1982576
## L-Palmitoylcarnitine                1.1980581
## Citramalic acid                     1.1775553
## N-acetyl-L-ornithine                1.1149622
## Uridine                             1.1120195
## Phosphocholine (+HAc)               1.0945526
## Leucic acid                         1.0908325
## Glycerate                           1.0874723
## PosIS_Lysine-2H8                    1.0793108
## Glutathione disulfide               1.0486646
## 5-Methylcytosine                    1.0439816
## PosIS_Inosine-15N4                  1.0365273
## Aspartate                           1.0322076
## NegIS_Thymine-2H4                   1.0310881
## UDP-N-acetyl-glucosamine            1.0212216
## PosIS_Glutamine-2H5-15N2            1.0104041
## UMP                                 1.0069796
## NegIS_Glucose-2H7-13C6              1.0041126
## Sucrose                             0.9917345
## Lactate                             0.9905264
## Methionine sulfoxide                0.9896006
## Histamine                           0.9895645
## Mannose-6-phosphate                 0.9881803
## Hypotaurine                         0.9838006
## Adenine                             0.9764535
## Hydroxyproline                      0.9740385
## Phenylalanine                       0.9739488
## Glycerophosphocholine (+HAc)        0.9661476
## Deoxyguanosine                      0.9542160
## Cystathionine                       0.9532942
## CDP-Choline                         0.9495000
## PosIS_Alanine-2H4-15N               0.9461741
## N-Acetyl-L-alanine                  0.9449103
## Sorbitol                            0.9343901
## Arginine-Alanine                    0.9221628
## D-2-Aminobutyric acid               0.9156346
## Fructose                            0.9101266
## Glycerol-3-phosphate                0.9092518
## Acetyl-arginine                     0.9050694
## Spermine                            0.8955273
## PosIS_Serine-13C3-15N1              0.8938346
## NegIS_Glycine-13C2_15N1             0.8877996
## a-ketoglutarate                     0.8818213
## Indole                              0.8716574
## Glucose-6-phosphate                 0.8711653
## Malate                              0.8690751
## Glutathione                         0.8610566
## Succinate                           0.8597732
## O-Propanoylcarnitine                0.8574560
## Carbamoyl phosphate                 0.8367812
## Dihydroxybenzeneacetic acid         0.8330689
## Homocysteine                        0.8289893
## Aconitate                           0.8157140
## Leucine                             0.8091813
## Guanosine                           0.8030839
## Serine                              0.7987984
## Glutamate                           0.7968480
## NADP+ (SIM)                         0.7768044
## Methionine                          0.7650715
## Hydroxyphenyl pyruvate              0.7601818
## Methylphenyllactate                 0.7571953
## Adenosine                           0.7544527
## Tyrosine                            0.7396607
## Purine                              0.7352270
## UDP-D-Glucose                       0.7326338
## Tryptophan                          0.7324533
## Ribitol                             0.7316265
## Raffinose                           0.7276736
## Dihydroxyacetone phosphate          0.7271345
## NegIS_Lysine-2H8                    0.7235685
## Hippuric acid                       0.7192444
## 1 3-Dimethyluric acid               0.7148183
## Threonine                           0.7142418
## NegIS_Inosine-15N4                  0.7138902
## Arginine-Glutamine                  0.7105654
## 5- Methylthioadenosine              0.7020726
## NegIS_Glutamine-2H5-15N2            0.7013269
## Glycerophosphocholine               0.6996244
## Fructose-6-phosphate                0.6965501
## Xanthine                            0.6933256
## ADP-D-Glucose                       0.6915677
## 5-Hydroxytryptophan                 0.6899659
## Glutaryl-carnitine                  0.6840990
## N N N-Trimethyllysine               0.6822649
## Citrulline                          0.6768827
## Cellobiose                          0.6763358
## Carnitine                           0.6667547
## Acetyl glycine                      0.6642562
## Guanine                             0.6513430
## Pyruvate                            0.6501816
## Citrate                             0.6495698
## CDP-Choline (+HAc)                  0.6378132
## Cytidine                            0.6333613
## Uracil                              0.6319751
## NegIS_Serine-13C3-15N1              0.6252614
## Cystine                             0.6232665
## NADH (SIM)                          0.6171055
## CMP                                 0.6050429
## Tryptamine                          0.6003752
## NegIS_Alanine-2H4-15N               0.5994133
## Levulinic Acid                      0.5938777
## Inositol                            0.5833383
## O-Acylcarnitine                     0.5434109
## Ascorbic acid                       0.5392257
## Hydroxyoctanoic acid                0.5341602
## Tyramine                            0.5141836
## Thymidine                           0.5103889
## Lysine-Glutamine                    0.5080734
## GMP                                 0.4954510
## NegIS_Malate-2H3                    0.4914272
## NAD+ (SIM)                          0.4879187
## Threonic acid                       0.4846377
## Creatine                            0.4772946
## AMP                                 0.4387995
## ADP                                 0.4384280
## 1-Methylhistidine                   0.4363119
## 3-Phenylbutyric acid                0.4277624
## Inosine                             0.4138897
## O-Decanoyl-L-carnitine              0.4122824
## Trigonelline                        0.4057788
## Isocitrate                          0.3969700
## S-adenosyl-L-methionine             0.3743649
## 5-Phenylvaleric acid                0.3339220
## Pantothenate                        0.3294431
## Tridecanoic acid                    0.3086940
## gamma-amino butyrate                0.3030969
## Glucose                             0.2855454
## Taurine                             0.2816848
## Creatinine                          0.2743437
## Thiamine                            0.2639383
## Proline                             0.2521395
## Methyloxovaleric acid (Ketoleucine) 0.2401009
## Cholesteryl sulfate                 0.2177561
## NAD+                                0.1902808
## L-Octanoylcarnitine                 0.1842706
## 11a-Hydroxyprogesterone             0.1540240
## Androstenedione                     0.0000000

Control vs Partial Mortality

D52_CvsP <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Bleached_Hot")

D52_CvsP_clean <- na.omit(D52_CvsP)

#assigning datasets 
X <- D52_CvsP[4:183]
Y <- as.factor(D52_CvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples

#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Control")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                                  AUC  p-value
## Control_Ambient vs Mortality_Hot   1 0.009023
#VIP Extraction
D52_CvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsP_VIP_DF <- as.data.frame(D52_CvsP_VIP[["tab"]])
D52_CvsP_VIP_DF
##                                            VIP
## Glutamine                           1.95183295
## Asparagine                          1.92307540
## Alanine                             1.87884065
## Dihydroorotate                      1.87651653
## Ornithine                           1.87189680
## N-acetyl-glutamine                  1.85536266
## NADPH (SIM)                         1.85321221
## Arginine                            1.83891009
## Homoarginine                        1.83299432
## 2-Aminoethylphosphonate             1.83223482
## Glycine                             1.82048680
## 4-Guanidinobutanoic acid            1.81468206
## Pyrroline-5-carboxylic acid         1.79602119
## NG-dimethyl-L-arginine              1.77876897
## o-acetyl-L-serine                   1.74075672
## UMP                                 1.68516324
## Acetyl proline                      1.65987799
## Isoleucine                          1.64581615
## O-Phosphorylethanolamine            1.63963923
## Homocysteine                        1.63509267
## Cellobiose                          1.62905929
## Histidine                           1.60639451
## Lysine                              1.59132135
## Sucrose                             1.58575238
## Nicotinamide riboside               1.55778749
## Aminoadipic acid                    1.53723685
## Uric acid                           1.52895394
## Threonine                           1.51605093
## Malate                              1.51217197
## Phosphocholine                      1.46409593
## Citrate                             1.46139623
## Inosine                             1.45563425
## Hydroxyproline                      1.45342368
## Adenine                             1.44939478
## Raffinose                           1.43234915
## Carnosine                           1.41147563
## Betaine                             1.39147821
## O-Butanoylcarnitine                 1.38769651
## Fructose                            1.33808606
## Phosphocholine (+HAc)               1.28591820
## Glucose                             1.28499677
## Isocitrate                          1.27020929
## Proline                             1.25794777
## Cytidine                            1.21106737
## Homocitrulline                      1.20177569
## Acetyl-arginine                     1.20107779
## N-acetyl-L-ornithine                1.19666001
## Uridine                             1.18533810
## Tryptophan                          1.17126818
## Cytosine                            1.15921045
## Arginine-Valine                     1.14747517
## Glutamate                           1.12687089
## Valine                              1.12163203
## Cholesteryl sulfate                 1.12004264
## Glutathione disulfide               1.11067175
## Citramalic acid                     1.10601174
## Ribose phosphate                    1.09233844
## Phenylpropanolamine                 1.06538435
## Guanine                             1.03581087
## NegIS_Thymine-2H4                   1.01472122
## Leucine                             1.01267682
## Glucose-6-phosphate                 1.00433520
## Glucuronic acid                     0.96968000
## Methylphenyllactate                 0.96143956
## Methionine sulfoxide                0.95133404
## L-Octanoylcarnitine                 0.94847611
## PosIS_Lysine-2H8                    0.94657143
## Citrulline                          0.94430892
## Purine                              0.94203603
## NADP+ (SIM)                         0.94184557
## Aconitate                           0.93310342
## Leucic acid                         0.92692049
## 3-Phenylbutyric acid                0.90413651
## gamma-amino butyrate                0.88816741
## D-2-Aminobutyric acid               0.87772485
## Cystine                             0.87558238
## Succinate                           0.87147178
## UDP-N-acetyl-glucosamine            0.87017478
## Stearic acid                        0.86617973
## Pyruvate                            0.86364769
## Inositol                            0.85652167
## Glycerol-3-phosphate                0.84484992
## Aspartate                           0.84408829
## Hypotaurine                         0.84025624
## CMP                                 0.83502291
## NADH (SIM)                          0.82668099
## NegIS_Serine-13C3-15N1              0.82402052
## N N N-Trimethyllysine               0.79975425
## Arginine-Glutamine                  0.79583599
## Deoxycytidine                       0.78823107
## Glycerate                           0.78497521
## GMP                                 0.77161310
## Pyroglutamate                       0.76390084
## Thymidine                           0.75105288
## Trigonelline                        0.74956969
## Lysine-Glutamine                    0.74831304
## AMP                                 0.73590124
## Cystathionine                       0.72966252
## Serine                              0.72653631
## PosIS_Inosine-15N4                  0.72413524
## Indole                              0.72140592
## Glutaryl-carnitine                  0.71072692
## Androstenedione                     0.69970435
## NegIS_Glycine-13C2_15N1             0.69649511
## Arginine-Alanine                    0.68746153
## Mannose-6-phosphate                 0.68064189
## Itaconic acid                       0.66831077
## O-Decanoyl-L-carnitine              0.63358224
## Histamine                           0.62613960
## Hippuric acid                       0.62455488
## Glutathione                         0.61255589
## PosIS_Serine-13C3-15N1              0.61206081
## NegIS_Lysine-2H8                    0.60709076
## NegIS_Glucose-2H7-13C6              0.59617948
## 5- Methylthioadenosine              0.59133989
## Tyramine                            0.58033535
## Fructose-6-phosphate                0.57882260
## CDP-Choline                         0.56316116
## Phenylalanine                       0.54839701
## PosIS_Glutamine-2H5-15N2            0.54591353
## 5-Hydroxytryptophan                 0.54101181
## NegIS_Inosine-15N4                  0.53977976
## Methionine                          0.52388330
## Ascorbic acid                       0.50583181
## L-Palmitoylcarnitine                0.49659592
## Acetyl glycine                      0.49200437
## NegIS_Glutamine-2H5-15N2            0.47921951
## 11a-Hydroxyprogesterone             0.47652810
## NAD+ (SIM)                          0.46136351
## Levulinic Acid                      0.45898971
## Taurine                             0.45507119
## Hydroxyoctanoic acid                0.44963397
## Sorbitol                            0.42287919
## N-Acetyl-L-alanine                  0.41186675
## O-Propanoylcarnitine                0.41046357
## Dihydroxybenzeneacetic acid         0.41035866
## Creatinine                          0.40434257
## NegIS_Malate-2H3                    0.40325888
## Guanosine                           0.39786977
## Lactate                             0.39385549
## Hypoxanthine                        0.38550512
## S-adenosyl-L-methionine             0.37936958
## 1-Methylhistidine                   0.37019858
## Uracil                              0.36766055
## Adenosine                           0.36265274
## N-acetyl-glutamate                  0.34685003
## Xanthine                            0.33962026
## Spermine                            0.33394092
## UDP-D-Glucose                       0.32811615
## NegIS_Alanine-2H4-15N               0.32292368
## PosIS_Alanine-2H4-15N               0.31825653
## Hydroxyphenyl pyruvate              0.31578207
## Tyrosine                            0.31039143
## Creatine                            0.30861250
## Acetyllysine                        0.30800895
## 5-Methylcytosine                    0.30724032
## NAD+                                0.30428741
## 5-Phenylvaleric acid                0.29983757
## Carnitine                           0.27933570
## Tridecanoic acid                    0.27651233
## Tryptamine                          0.26354981
## 1 3-Dimethyluric acid               0.25801469
## Deoxyguanosine                      0.24832297
## 5-Methoxytryptamine                 0.24828789
## ADP-D-Glucose                       0.20810149
## Nicotinate                          0.19656018
## Glycerophosphocholine               0.19466556
## Pantothenate                        0.18802013
## ADP                                 0.18575476
## Dihydroxyacetone phosphate          0.17509571
## Carbamoyl phosphate                 0.16183222
## Methyloxovaleric acid (Ketoleucine) 0.15785538
## O-Acylcarnitine                     0.13936833
## Ribitol                             0.12097866
## Glycerophosphocholine (+HAc)        0.11271632
## CDP-Choline (+HAc)                  0.11117794
## Thiamine                            0.11116440
## Threonic acid                       0.10534756
## Mandelic acid                       0.10458903
## a-ketoglutarate                     0.03554707

Bleached vs Partial Mortality

D52_BvsP <- norm.data_2 %>%
  filter(Time == "Day52") %>%
  filter(Treatment != "Control_Ambient")

D52_BvsP_clean <- na.omit(D52_BvsP)

#assigning datasets 
X <- D52_BvsP[4:183]
Y <- as.factor(D52_BvsP$Treatment) 
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples


#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda)    # 2 Plot the samples

#plotVar(MyResult.plsda, cutoff = 0.7)    

plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Partial Mortality")

MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value

#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1

comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)

# component validation 
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
                     progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50

plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)

## $Comp1
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp2
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp3
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp4
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp5
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
## 
## $Comp6
##                               AUC  p-value
## Bleached_Hot vs Mortality_Hot   1 0.009023
#VIP Extraction
D52_BvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_BvsP_VIP_DF <- as.data.frame(D52_BvsP_VIP[["tab"]])
D52_BvsP_VIP_DF
##                                           VIP
## Asparagine                          2.3403317
## N-acetyl-glutamate                  2.2812787
## Stearic acid                        2.1681666
## Sucrose                             2.1212151
## Itaconic acid                       2.0733473
## Glutaryl-carnitine                  1.9873658
## Homocysteine                        1.9049851
## Inosine                             1.8635605
## Guanosine                           1.8447103
## Aconitate                           1.7704511
## Acetyllysine                        1.7544757
## Cellobiose                          1.7530563
## Glucose                             1.7031338
## Dihydroorotate                      1.6957906
## O-Propanoylcarnitine                1.6934506
## Malate                              1.6695875
## Isocitrate                          1.6628276
## Proline                             1.6479034
## Glycerate                           1.6355353
## L-Palmitoylcarnitine                1.6158436
## Threonine                           1.6026816
## Thymidine                           1.5252766
## Spermine                            1.5164722
## 5-Methylcytosine                    1.5032389
## Glycerophosphocholine (+HAc)        1.4996405
## Mandelic acid                       1.4993117
## Cholesteryl sulfate                 1.4804596
## Cytidine                            1.4662498
## UDP-D-Glucose                       1.4614837
## Hydroxyproline                      1.4573431
## Raffinose                           1.4304646
## Homoarginine                        1.4259474
## Hypoxanthine                        1.4115503
## L-Octanoylcarnitine                 1.4076587
## Sorbitol                            1.3965138
## Acetyl-arginine                     1.3949781
## Nicotinate                          1.3922016
## N-acetyl-glutamine                  1.3801435
## GMP                                 1.3371424
## Trigonelline                        1.3060575
## Pyrroline-5-carboxylic acid         1.3034550
## 3-Phenylbutyric acid                1.2917389
## 5-Methoxytryptamine                 1.2818365
## Mannose-6-phosphate                 1.2550524
## Carnitine                           1.2071169
## Nicotinamide riboside               1.2001203
## Purine                              1.1998143
## Adenine                             1.1881913
## Tryptophan                          1.1665866
## Pyroglutamate                       1.1528613
## NADPH (SIM)                         1.1516736
## Adenosine                           1.1150886
## Guanine                             1.1002273
## Carbamoyl phosphate                 1.0687537
## Cystine                             1.0677126
## NAD+ (SIM)                          1.0516237
## Ribose phosphate                    1.0435433
## Glycerophosphocholine               1.0368153
## Uric acid                           1.0299369
## Methylphenyllactate                 1.0271903
## Fructose                            1.0173638
## Hydroxyphenyl pyruvate              1.0148936
## Glycine                             0.9896683
## Histamine                           0.9811026
## 1 3-Dimethyluric acid               0.9778179
## Androstenedione                     0.9533351
## o-acetyl-L-serine                   0.9452371
## Dihydroxyacetone phosphate          0.9434353
## S-adenosyl-L-methionine             0.8953292
## Glutamate                           0.8855392
## Lactate                             0.8720782
## Tyramine                            0.8687068
## Inositol                            0.8599795
## AMP                                 0.8540432
## a-ketoglutarate                     0.8485973
## Uracil                              0.8481745
## Valine                              0.8418153
## Betaine                             0.8205602
## 11a-Hydroxyprogesterone             0.8131395
## Arginine                            0.8113272
## NegIS_Glucose-2H7-13C6              0.7961449
## Pyruvate                            0.7842410
## Ribitol                             0.7811872
## Methyloxovaleric acid (Ketoleucine) 0.7729055
## Citrulline                          0.7495753
## NegIS_Serine-13C3-15N1              0.7395845
## Fructose-6-phosphate                0.7351441
## Lysine                              0.7242599
## ADP-D-Glucose                       0.7221048
## Homocitrulline                      0.7028496
## 5- Methylthioadenosine              0.6952357
## NADH (SIM)                          0.6931631
## CDP-Choline                         0.6840125
## Levulinic Acid                      0.6818591
## Ornithine                           0.6713528
## Tyrosine                            0.6644111
## 2-Aminoethylphosphonate             0.6500598
## NegIS_Thymine-2H4                   0.6414871
## Dihydroxybenzeneacetic acid         0.6359765
## Aminoadipic acid                    0.6213054
## UDP-N-acetyl-glucosamine            0.6198763
## Xanthine                            0.6177301
## Hippuric acid                       0.6100370
## Phosphocholine                      0.6097056
## O-Decanoyl-L-carnitine              0.5894843
## NegIS_Alanine-2H4-15N               0.5892492
## UMP                                 0.5846745
## NG-dimethyl-L-arginine              0.5835716
## Glycerol-3-phosphate                0.5808181
## N N N-Trimethyllysine               0.5774171
## Phenylalanine                       0.5753140
## N-Acetyl-L-alanine                  0.5737060
## Hypotaurine                         0.5725657
## O-Acylcarnitine                     0.5639802
## Arginine-Alanine                    0.5564164
## 4-Guanidinobutanoic acid            0.5509073
## Glucuronic acid                     0.5505127
## Cytosine                            0.5477215
## Citrate                             0.5431613
## Threonic acid                       0.5373127
## NegIS_Inosine-15N4                  0.5344579
## Acetyl proline                      0.5192669
## gamma-amino butyrate                0.5172393
## Phenylpropanolamine                 0.5103872
## Phosphocholine (+HAc)               0.4990537
## Ascorbic acid                       0.4945664
## Creatine                            0.4862920
## 5-Phenylvaleric acid                0.4849747
## Isoleucine                          0.4816555
## NegIS_Glutamine-2H5-15N2            0.4770023
## Taurine                             0.4711647
## Indole                              0.4701378
## NegIS_Glycine-13C2_15N1             0.4598130
## Glutathione                         0.4589659
## Acetyl glycine                      0.4433496
## Leucine                             0.4285686
## CMP                                 0.4280790
## Uridine                             0.4254490
## Glutathione disulfide               0.4250539
## Tryptamine                          0.4182956
## Aspartate                           0.4137877
## Glucose-6-phosphate                 0.4126074
## Deoxyguanosine                      0.3999495
## PosIS_Inosine-15N4                  0.3987825
## Methionine                          0.3985404
## Histidine                           0.3981328
## Glutamine                           0.3903608
## PosIS_Alanine-2H4-15N               0.3831474
## O-Butanoylcarnitine                 0.3766952
## Tridecanoic acid                    0.3755690
## Citramalic acid                     0.3743930
## Arginine-Glutamine                  0.3742309
## NAD+                                0.3723728
## PosIS_Glutamine-2H5-15N2            0.3698439
## PosIS_Serine-13C3-15N1              0.3698128
## Lysine-Glutamine                    0.3674037
## O-Phosphorylethanolamine            0.3598021
## Pantothenate                        0.3553003
## Serine                              0.3483882
## NADP+ (SIM)                         0.3275083
## N-acetyl-L-ornithine                0.3227260
## 1-Methylhistidine                   0.3096755
## 5-Hydroxytryptophan                 0.3075385
## Succinate                           0.2951576
## CDP-Choline (+HAc)                  0.2906145
## Creatinine                          0.2831129
## Alanine                             0.2801036
## Methionine sulfoxide                0.2785114
## Hydroxyoctanoic acid                0.2580881
## Thiamine                            0.2533475
## Deoxycytidine                       0.2348238
## D-2-Aminobutyric acid               0.2333163
## ADP                                 0.2186992
## PosIS_Lysine-2H8                    0.1968858
## NegIS_Lysine-2H8                    0.1865058
## Arginine-Valine                     0.1658788
## Leucic acid                         0.1561995
## NegIS_Malate-2H3                    0.1359512
## Carnosine                           0.0000000
## Cystathionine                       0.0000000

VIP Comparisons for Final Timepoint

####### Overlaps for VIPs >1 #########

# Converting row names to column
D52_CvsB_VIP_table <- rownames_to_column(D52_CvsB_VIP_DF, var = "Metabolite")
D52_CvsP_VIP_table <- rownames_to_column(D52_CvsP_VIP_DF, var = "Metabolite")
D52_BvsP_VIP_table <- rownames_to_column(D52_BvsP_VIP_DF, var = "Metabolite")

# Filtering for VIP > 1
D52_CvsB_VIP_1 <- D52_CvsB_VIP_table %>% 
  filter(VIP >= 1)

D52_CvsP_VIP_1 <- D52_CvsP_VIP_table %>% 
  filter(VIP >= 1)

D52_BvsP_VIP_1 <- D52_BvsP_VIP_table %>% 
  filter(VIP >= 1)

D52_CvsB_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_CvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Control vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_BvsP_VIP_1 %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_point() +
  ylab("Metabolite") +
  xlab("VIP Score") +
  ggtitle("Bleached vs Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

### Compare for Venn Diagram (https://github.com/gaospecial/ggVennDiagram)

#Open this part in Repo

# nrow(D52_CvsB_VIP_1)
# nrow(D52_CvsP_VIP_1)
# 
# library("VennDiagram")
# 
# venn.x <- list(
#   D52_CvsB = sample(D52_CvsB_VIP_1$Metabolite), 
#   D52_CvsP = sample(D52_CvsP_VIP_1$Metabolite)
# )
# 
# myCol <- c("#8B0046", "#468B00")
# 
# venn.diagram(venn.x,
#              filename = 'output/Metabolomics/D52_BvsP_venn.png',
#              output = TRUE,
#              height = 480, 
#              width = 480,
#              resolution = 300,
#              category.names = c("Bleached", "Partial Mortality"),
#              lwd = 2,
#              col = c("#8B0046", "#468B00"),
#              fill = c(alpha("#8B0046",0.3), alpha("#468B00", 0.3)),
#              cex = 0.5,
#              fontfamily = "sans",
#              fontface = "bold",
#              cat.cex = 0.5,
#              cat.default.pos = "outer",
#              cat.pos = c(12, -12),
#              cat.dist = c(-0.45, -0.45),
#              cat.fontfamily = "sans",
#              cat.fontface = "bold"
#              )

T-test validation: Control vs Bleached

# Control vs Bleached

## Gather data frame and group by metabolite 
D52_CvB_gather <- D52_CvsB_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_CvB_gather$Treatment <- as.factor(D52_CvB_gather$Treatment)
D52_CvB_gather <- dplyr::group_by(D52_CvB_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvB_VIP_Select <- subset(D52_CvB_gather, D52_CvB_gather$Metabolite %in% D52_CvsB_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Control
D52_CvB_t.test <-do(D52_CvB_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_CvB_t.test$p.adj<-p.adjust(D52_CvB_t.test$p.value, method=c("fdr"), n=length(D52_CvsB_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_CvB_t.test.fdr <- D52_CvB_t.test %>% filter(p.adj <= 0.05)
length(D52_CvB_t.test.fdr$Metabolite) #10
## [1] 10
# # Filter for significantly different metabolites (p < 0.05)
# D52_CvB_t.test.sig <- D52_CvB_t.test %>% filter(p.value <= 0.05)
# length(D52_CvB_t.test.sig$Metabolite) #went from 32 to 10 with p value adjustment

# Filter for metabolites accumulated compared to control
D52_CvB_t.test.sig.up <- D52_CvB_t.test.fdr %>% filter(estimate1 > estimate2)

# Filter for metabolites depleted compared to control
D52_CvB_t.test.sig.down <- D52_CvB_t.test.fdr %>% filter(estimate1 < estimate2)

T-test validation: Control vs Partial Mortality

# Control vs Bleached

## Gather data frame and group by metabolite 
D52_CvsP_gather <- D52_CvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_CvsP_gather$Treatment <- as.factor(D52_CvsP_gather$Treatment)
D52_CvsP_gather <- dplyr::group_by(D52_CvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvsP_VIP_Select <- subset(D52_CvsP_gather, D52_CvsP_gather$Metabolite %in% D52_CvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Control_Ambient, estimate 2 = Bleached_Hot
D52_CvsP_t.test <-do(D52_CvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_CvsP_t.test$p.adj<-p.adjust(D52_CvsP_t.test$p.value, method=c("fdr"), n=length(D52_CvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_CvsP_t.test.fdr <- D52_CvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_CvsP_t.test.fdr$Metabolite) #34
## [1] 34
# Filter for significantly different metabolites (p < 0.05)
# D52_CvsP_t.test.sig <- D52_CvsP_t.test %>% filter(p.value <= 0.05)
# length(D52_CvsP_t.test.sig$Metabolite) #went from 38 to 34 with p value adjustment

# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

T-test validation: Bleached vs Partial Mortality

# Bleach vs Partial Mortality

## Gather data frame and group by metabolite 
D52_BvsP_gather <- D52_BvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
  dplyr::select("Treatment", "Metabolite", "Count")

D52_BvsP_gather$Treatment <- as.factor(D52_BvsP_gather$Treatment)
D52_BvsP_gather <- dplyr::group_by(D52_BvsP_gather, Metabolite)

# Select metabolites only present from the PLS-DA with VIPs >1
D52_BvsP_VIP_Select <- subset(D52_BvsP_gather, D52_BvsP_gather$Metabolite %in% D52_BvsP_VIP_1$Metabolite)

#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Mortality_Hot
D52_BvsP_t.test <-do(D52_BvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
                 alternative = "two.sided",
                 mu = 0,
                 paired = FALSE,
                 var.equal = FALSE,
                 conf.level = 0.95
                 )))

#adjust p value for the number of comparisons
D52_BvsP_t.test$p.adj<-p.adjust(D52_BvsP_t.test$p.value, method=c("fdr"), n=length(D52_BvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method

# Filter for significantly different metabolites (p < 0.05)
D52_BvsP_t.test.fdr <- D52_BvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_BvsP_t.test.fdr$Metabolite) #
## [1] 0
# Filter for significantly different metabolites (p < 0.05)
# D52_CvsP_t.test.sig <- D52_CvsP_t.test %>% filter(p.value <= 0.05)
# length(D52_CvsP_t.test.sig$Metabolite) #went from 38 to 34 with p value adjustment

# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)

# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)

VIP plotting after t-test validation

# Selecting metabolites that were validated by the t-test for each up and down accumulated
D52_CvsB_VIP_sig_up <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.up$Metabolite)
D52_CvsB_VIP_sig_down <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.down$Metabolite)

D52_CvsP_VIP_sig_up <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.up$Metabolite)
D52_CvsP_VIP_sig_down <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.down$Metabolite)


write.csv(D52_CvsB_VIP_sig_up, "../output/Metabolomics/D52_CvsB_VIP_sig_up.csv")
write.csv(D52_CvsB_VIP_sig_down, "../output/Metabolomics/D52_CvsB_VIP_sig_down.csv")
write.csv(D52_CvsP_VIP_sig_up, "../output/Metabolomics/D52_CvsP_VIP_sig_up.csv")
write.csv(D52_CvsP_VIP_sig_down, "../output/Metabolomics/D52_CvsP_VIP_sig_down.csv")

# Plotting

D52_CvsB_VIP_UP <- D52_CvsB_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("Accumulated") +
  xlab("") +
  ggtitle("Bleached") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D52_CvsB_VIP_DOWN <- D52_CvsB_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("Depleted") +
  xlab("VIP Score") +
#  ggtitle("Bleached: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 

D52_CvsP_VIP_UP <- D52_CvsP_VIP_sig_up %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
  ggtitle("Partial Mortality") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_CvsP_VIP_DOWN <- D52_CvsP_VIP_sig_down %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point() +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
                     panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

D52_All_VIP_plot <- plot_grid(D52_CvsB_VIP_UP, D52_CvsP_VIP_UP, D52_CvsB_VIP_DOWN, D52_CvsP_VIP_DOWN, 
                              align = "v", 
                              ncol = 2, 
                              rel_heights = c(0.95, 0.35), 
                              labels = c("A", "B", "C", "D"))

D52_All_VIP_plot

#ggsave(filename="../output/Metabolomics/Day52_VIP.pdf", plot=D52_All_VIP_plot, dpi=300, width=7, height=9, units="in")

VIP plotting after t-test validation and comparing overlapping metabolites

# Up regulated metabolites that overlap between B and P

D52_overlap_BvsP_up_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_overlap_BvsP_up_1)[1] <- 'Metabolite'
D52_overlap_BvsP_up_2 <- merge(D52_overlap_BvsP_up_1, D52_CvsB_VIP_sig_up, by="Metabolite")
D52_overlap_BvsP_up_VIP <- merge(D52_overlap_BvsP_up_2, D52_CvsP_VIP_sig_up, by="Metabolite")
names(D52_overlap_BvsP_up_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_up_VIP)[3] <- 'Partial Mortality'

D52_overlap_BvsP_up_VIP_comb <- gather(D52_overlap_BvsP_up_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')

# Up regulated metabolites that are unique to B

D52_uniqueB_BvsP_up <- as.data.frame(setdiff(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_uniqueB_BvsP_up)[1] <- 'Metabolite'
D52_uniqueB_BvsP_up_VIP <- merge(D52_uniqueB_BvsP_up, D52_CvsB_VIP_sig_up, by="Metabolite")

# Up regulated metabolites that are unique to P

D52_uniqueP_BvsP_up <- as.data.frame(setdiff(D52_CvsP_VIP_sig_up$Metabolite, D52_CvsB_VIP_sig_up$Metabolite))
names(D52_uniqueP_BvsP_up)[1] <- 'Metabolite'
D52_uniqueP_BvsP_up_VIP <- merge(D52_uniqueP_BvsP_up, D52_CvsP_VIP_sig_up, by="Metabolite")

# Down regulated metabolites that overlap between B and P

D52_overlap_BvsP_down_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_down$Metabolite, D52_CvsP_VIP_sig_down$Metabolite))
names(D52_overlap_BvsP_down_1)[1] <- 'Metabolite'
D52_overlap_BvsP_down_2 <- merge(D52_overlap_BvsP_down_1, D52_CvsB_VIP_sig_down, by="Metabolite")
D52_overlap_BvsP_down_VIP <- merge(D52_overlap_BvsP_down_2, D52_CvsP_VIP_sig_down, by="Metabolite")
names(D52_overlap_BvsP_down_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_down_VIP)[3] <- 'Partial Mortality'

D52_overlap_BvsP_down_VIP_comb <- gather(D52_overlap_BvsP_down_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')


# Down regulated metabolites that are unique to B 
# There are none 

# Down regulated metabolites that are unique to P

D52_uniqueP_BvsP_down <- as.data.frame(setdiff(D52_CvsP_VIP_sig_down$Metabolite, D52_CvsB_VIP_sig_down$Metabolite))
names(D52_uniqueP_BvsP_down)[1] <- 'Metabolite'
D52_uniqueP_BvsP_down_VIP <- merge(D52_uniqueP_BvsP_down, D52_CvsP_VIP_sig_down, by="Metabolite")


# Plotting

D52_uniqueB_BvsP_up_plot <- D52_uniqueB_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#8B0046") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_uniqueP_BvsP_up_plot <- D52_uniqueP_BvsP_up_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
#  ggtitle("Partial Mortality Unique") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_uniqueP_BvsP_down_plot <- D52_uniqueP_BvsP_down_VIP %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, color = "#468B00") +
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Partial Mortality: Depleted") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17))

D52_overlap_BvsP_up_plot <- D52_overlap_BvsP_up_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1.2, 2) +
  ylab("") +
  xlab("") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17),
                     legend.position = "none")

D52_overlap_BvsP_down_plot <- D52_overlap_BvsP_down_VIP_comb %>%
  arrange(VIP) %>%
  ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
  geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
  geom_point(size = 7, aes(color = Group))+
  scale_colour_manual(values=c("#8B0046", "#468B00")) +
  scale_fill_manual(values=c("#8B0046", "#468B00")) + 
  xlim(1.2, 2) +
  ylab("") +
  xlab("VIP Score") +
#  ggtitle("Bleached Overlap") +
  theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.text = element_text(size = 20),
                     legend.title = element_text(size = 22)) 


D52_All_VIP_plot <- D52_uniqueB_BvsP_up_plot +
                     D52_overlap_BvsP_up_plot +
                     D52_uniqueP_BvsP_up_plot +
                     guide_area() +
                     D52_overlap_BvsP_down_plot +
                     D52_uniqueP_BvsP_down_plot + 
                     plot_layout(ncol=3, heights=c(0.4,0.1), guides = "collect") 
 
ggsave(filename="../output/Metabolomics/Day52_VIP.png", plot=D52_All_VIP_plot, dpi=300, width=20, height=9, units="in")
Day_52_VIP

Day_52_VIP

Pathway Analysis

Day 52 Control vs Bleached

library(MetaboAnalystR)

# D52 Control vs Bleached UP Overrepresentation Analysis and  via Metaboanalyst 4.0

D52_CvsB_VIP_sig_up
D52_CvsB_VIP_sig_up$Metabolite[D52_CvsB_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"

mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsB_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine") #Replacing metabolite
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)

D52_CvsB_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsB_up_KEGG <- as.data.frame(D52_CvsB_up_dataset$map.table) #extracting KEGG terms

write.csv(D52_CvsB_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_KEGG.csv")

## PATHWAY ENRICHMENT

#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)

mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F) 
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters

D52_CvsB_up_analset_KEGG <- mSet_Path$api
D52_CvsB_up_PATHWAY <- as.data.frame(D52_CvsB_up_analset_KEGG$ora.results) #extracting KEGG terms

write.csv(D52_CvsB_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_PATHWAY.csv")

D52_CvsB_up_PATHWAY 

# PLOTTING

D52_CvsB_up_PATHWAY <- tibble::rownames_to_column(D52_CvsB_up_PATHWAY, "Pathway")
names(D52_CvsB_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsB_up_PATHWAY)[6] <- "neglogp"

logsig<- -log(0.05)

pointstolabel <- D52_CvsB_up_PATHWAY %>%
  filter(pvalue < 0.05)

D52_CvsB_up_PATHWAY_plot <- D52_CvsB_up_PATHWAY %>%
    mutate(color = ifelse(neglogp>logsig, "red", "grey")) %>%
    ggplot(aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color)) +
    geom_point(shape=21) +
    scale_color_identity() +
    scale_fill_identity() +
    geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
#    geom_text(data=subset(D52_CvsP_up_PATHWAY, neglogp>logsig), aes(Impact,neglogp,label=Pathway)) +
    ylab("-log(p)") +
    xlab("Pathway Impact") +
    theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.position="none") 

D52_CvsB_up_PATHWAY_plot

Day 52 Control vs Partial Mortality

# D52 Control vs Partial Mortality UP Overrepresentation Analysis and  via Metaboanalyst 4.0

D52_CvsP_VIP_sig_up
D52_CvsP_VIP_sig_up$Metabolite[D52_CvsP_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"

mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsP_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
# mSet<-PerformDetailMatch(mSet, "Acetyl Proline")
# mSet<-GetCandidateList(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine")
mSet<-PerformDetailMatch(mSet, "Pyrroline-5-carboxylic acid")
mSet<-GetCandidateList(mSet)
#mSet<-SetCandidate(mSet, "Pyrroline-5-carboxylic acid", "1-Pyrroline-5-carboxylic acid") #this isnt working for some reson
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)

D52_CvsP_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsP_up_KEGG <- as.data.frame(D52_CvsP_up_dataset$map.table) #extracting KEGG terms

write.csv(D52_CvsP_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_KEGG.csv")

## PATHWAY ENRICHMENT

#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)

mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F) 
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters

D52_CvsP_up_analset_KEGG <- mSet_Path$api
D52_CvsP_up_PATHWAY <- as.data.frame(D52_CvsP_up_analset_KEGG$ora.results) #extracting KEGG terms

write.csv(D52_CvsP_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_PATHWAY.csv")

D52_CvsP_up_PATHWAY 

# Plotting

D52_CvsP_up_PATHWAY <- tibble::rownames_to_column(D52_CvsP_up_PATHWAY, "Pathway")
names(D52_CvsP_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsP_up_PATHWAY)[6] <- "neglogp"

logsig<- -log(0.05)

pointstolabel <- D52_CvsP_up_PATHWAY %>%
  filter(pvalue < 0.05)

D52_CvsP_up_PATHWAY_plot <- D52_CvsP_up_PATHWAY %>%
    mutate(color = ifelse(neglogp>logsig, "red", "grey")) %>%
    ggplot(aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color)) +
    geom_point(shape=21) +
    scale_color_identity() +
    scale_fill_identity() +
    geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
#    geom_text(data=subset(D52_CvsP_up_PATHWAY, neglogp>logsig), aes(Impact,neglogp,label=Pathway)) +
    ylab("-log(p)") +
    xlab("Pathway Impact") +
    theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), 
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     axis.text = element_text(size=17, color="black"), 
                     title = element_text(size=17, face="bold"), 
                     axis.title = element_text(size=17), 
                     legend.position="none") 

D52_CvsP_up_PATHWAY_plot